In [2]:
%matplotlib inline

In [3]:
import networkx as nx
print nx.__version__


1.8.1

First, let's define a base undirected acyclic graph. We're assuming these relationships are defined externally.


In [11]:
g = nx.Graph()
g.add_edge('otu1', 'int1', weight=0.1)
g.add_edge('otu2', 'int1', weight=0.2)
g.add_edge('otu3', 'int2', weight=0.5)
g.add_edge('otu4', 'int2', weight=0.7)
g.add_edge('int1', 'int2', weight=0.3)

In [12]:
nx.draw(g)


Next, let's setup our IDs and the corresponding counts for each sample.


In [13]:
otu_ids = ['otu1', 'otu2', 'otu3', 'otu4']
s1 = [1, 1, 0, 0]
s2 = [1, 1, 1, 0]

In [14]:
def _get_all_nodes(graph, tips):
    """Get all the nodes that connect the tips"""
    nodes = set()
    for i in range(len(tips) - 1):
        nodes.update(set(nx.shortest_path(graph, source=tips[i], target=tips[i+1])))
    return nodes

def unweighted_unifrac_uag(u_counts, v_counts, otu_ids, graph):
    """Compute unweighted unifrac over an undirected acyclic graph"""
    u_ids = [i for u, i in zip(u_counts, otu_ids) if u]
    v_ids = [i for v, i in zip(v_counts, otu_ids) if v]
    
    u_nodes = _get_all_nodes(graph, u_ids)
    v_nodes = _get_all_nodes(graph, v_ids)
    
    u_sg = graph.subgraph(u_nodes)
    v_sg = graph.subgraph(v_nodes)
    
    unique_edges = set(u_sg.edges()).symmetric_difference(set(v_sg.edges()))
    
    total_sg = graph.subgraph(u_nodes | v_nodes)
    unique_sg = graph.subgraph(set(flatten(unique_edges)))

    return unique_sg.size(weight='weight') / total_sg.size(weight='weight')

In [16]:
unweighted_unifrac_uag(s1, s2, otu_ids, g)


Out[16]:
0.7272727272727273

In [ ]: